********************************************************************************
* Supplement to TableA1.do
*
* 
* Restrict hospital-year panel to observations with complete data on ALL outcomes
*
********************************************************************************


*******
******* settings  
*******

* locals and directories

** specify an output directory 
local fpath_output "/homes/nber/shruthi-dua51934/sacarny-DUA51934/shruthi-dua51934/replication_files/output_20230606"

** directory containing the main analytic file
local fpath_build_output = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/output"

** directory ccontaining himss supplemental variables 
local fpath_himss = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/himss/input/"

** directory containing physician flows supplemental data 
local fpath_physician_flows = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/physician_flows/output/"




*******
******* read the data and create main outcomes 
*******
use "`fpath_build_output'/acq_cleaned_complete_20230606.dta", clear 

** clean up
label var hospbd "Total beds"

* reformat urbancbsa variable: 1 if rural 
bys id: egen urban2 = max(urbancbsa)
replace urbancbsa = urban2 
replace urbancbsa = 1 - urbancbsa 
label var urbancbsa "Share rural hospitals"

* drop if missing hrrcode
drop if missing(hrrcode)

* numeric variable for the aha id 
egen id2 = group(id)

* total cost per adjusted discharge 
gen adj_costs = totcost / ((1000000)*ipdischarges_adultped*(1 + (opcharge/ipcharge)))
label var adj_costs "Adjusted costs per IP discharge (\$1 millions)" 
winsor2 adj_costs, cuts(05 95) by(year)

* total revenue per adjusted discharge 
gen adj_revenue = income / ((1000000)*ipdischarges_adultped*(1 + (opcharge/ipcharge)))
label var adj_revenue "Adjusted revenue per IP discharge (\$1 millions)"
winsor2 adj_revenue, cuts(05 95) by(year)


* log costs and revenue 
gen ladjcosts_w = log((adj_costs_w))
label var ladjcosts_w "Log(costs/bed)"

gen ladjrev_w = log((adj_revenue_w))
label var ladjcosts_w "Log(revenue/bed)"

* price
winsor2 dafny_price, cuts(05 95) by(year)
gen ldafny_price05 = log(dafny_price_w)
label var ldafny_price05 "log(Dafny price index)"

* winsorize profit margin in each year
winsor2 profit_margin, cuts(05 95) by(year) label

* capital investment 
replace capinv_tot = capinv_tot/1000000
label var capinv_tot "Capital investment"
gen capinv_tot_per_bed = capinv_tot / hospbd
label var capinv_tot_per_bed "Capital investment per bed"
* winsorize and log  capital investment
winsor2 capinv_tot, cuts(05 95) by(year) label
gen lw_capinv_tot = log(1+capinv_tot_w)
label var lw_capinv_tot "log(1+winsorized capital investment)"


* scale FTE counts by beds 
gen fte_per_bed = fte/hospbd
label var fte_per_bed  "Full time employees per bed"

* create log(fte)
gen logfte = log(fte)
label var logfte "log(FTE)"
label var fte "Full Time Employees"



********
******** for the difference-in-difference analysis
******** 
**  restrict the sample: Legacy, Target, Other forprofits 
keep if forprofit == 1 
drop if target2 == 1 | acq_other == 1 

label var ind08_11acq_legacy "Acquirer * 08-11"
label var ind12_14target "Target * 12-14"

* define post-treatment 
gen postm = ( year >= 2008)
gen postm_target = postm * target 
gen postm_legacy = postm * acq_legacy 
label var postm_target "Post 2008 * Target"
label var postm_legacy "Post 2008 * Acquirer"

* define interim
gen interim = (year == 2007)
gen interim_target = interim * target
gen interim_legacy = interim * acq_legacy

label var interim_target "Target 2007"
label var interim_legacy "Acquirer 2007"

* sanity check... 
assert target2 + acq_other == 0 

* event study
* generate year dummies 
tab year, gen(yr)

*interact year dummies with acq legacy/ target indicators  
foreach v of varlist yr* {
		gen target_`v' = target * `v'	
		gen legacy_`v' = acq_legacy * `v'

	}	

* clean up the labels on group * year interactions 
foreach v of varlist target_yr* {
	local num = subinstr("`v'", "target_yr", "", 1) 
	local `num' %4.0f `num'
	local num = `num' + 2002
	label var `v' "Target * `num'"
	}


foreach v of varlist legacy_yr* {
	local num = subinstr("`v'", "legacy_yr", "", 1) 
	local `num' %4.0f `num'
	local num = `num' + 2002
	label var `v' "Acquirer * `num'"
	}	

* for event studies only; drop dummy for 2006 (t-1)
drop *yr4

* gen year * hrr FEs
egen yrhrr = group(year hrrcode)

* gen year * HRR TREND
gen yr_hrr_trend = year * hrrcode





******
****** keep observations for which we have non-missing info
******
drop if missing(ldafny_price05)
drop if missing(lw_capinv_tot)
drop if missing(logfte)

* bring in the vendor distance info 
merge 1:1 id year using "`fpath_himss'/vendor_distance_indiv2003_2014_v2.dta", keep(match) nogen 

* bring in the physician flows info
merge 1:1 id year using "`fpath_physician_flows'/physician_flows_data2003_2014", keepusing(wpat* wcost* *_sh nphyshosp) keep(match) nogen 


gen discvendcount = distlegacy^2

gen ldiscvendcount = log(distlegacy^2)

save "`fpath_output'/acq_uniform_sample_20230725", replace 





